Conversation
| seen = set() | ||
| deduped = [] | ||
| for pkg in extra_packages: | ||
| if pkg not in seen: | ||
| seen.add(pkg) | ||
| deduped.append(pkg) |
There was a problem hiding this comment.
Does the order matter? If not, this could be simplified to following -
extra_packages = list(set(extra_packages))
| "NvTensorRTRTXExecutionProvider", | ||
| ] | ||
|
|
||
| SUPPORTED_PRECISIONS = [ |
| # Constants | ||
| # --------------------------------------------------------------------------- | ||
|
|
||
| SUPPORTED_PROVIDERS = [ |
| CMD_OPTIMIZE = "optimize" | ||
| CMD_QUANTIZE = "quantize" | ||
| CMD_FINETUNE = "finetune" | ||
| CMD_CAPTURE_ONNX_GRAPH = "capture_onnx_graph" | ||
| CMD_BENCHMARK = "benchmark" | ||
| CMD_DIFFUSION_LORA = "diffusion_lora" | ||
| CMD_EXPLORE_PASSES = "explore_passes" | ||
| CMD_VALIDATE_CONFIG = "validate_config" | ||
| CMD_RUN_CONFIG = "run_config" |
There was a problem hiding this comment.
Combine into a named StrEnum?
| while True: | ||
| try: | ||
| line = await proc.stderr.readline() | ||
| except ValueError: | ||
| # Line exceeded even the 10MB limit — skip it | ||
| continue | ||
| if not line: | ||
| break | ||
| decoded = line.decode("utf-8", errors="replace").rstrip() | ||
| if decoded: | ||
| # Truncate extremely long lines for display (e.g. base64 blobs) | ||
| if len(decoded) > 500: | ||
| decoded = decoded[:500] + "... (truncated)" | ||
| _job_log(job_id, decoded) |
There was a problem hiding this comment.
This loop will block infinitely if there's nothing written to stderr. Also, an empty line will also break out of the loop which isn't intended behavior. Check explicitly for None.
if line is None: break
| stdout=asyncio.subprocess.PIPE, | ||
| stderr=asyncio.subprocess.PIPE, | ||
| env=env, | ||
| limit=10 * 1024 * 1024, # 10 MB line limit (default 64KB is too small for olive output) |
There was a problem hiding this comment.
You could start a slave thread to read the proc.stdout and not be limited by the size. That approach will also provide a live progress update the user rather than waiting till the end when the process completes.
| elif command == CMD_QUANTIZE: | ||
| algorithm = kwargs.get("algorithm", "rtn") | ||
| impl = kwargs.get("implementation", "olive") | ||
| if impl == "bnb": | ||
| extras.add("bnb") | ||
| elif impl == "inc": | ||
| extras.add("inc") | ||
| elif impl == "autogptq" or algorithm == "gptq": | ||
| extra_packages.extend(["auto-gptq", "optimum", "datasets"]) | ||
| elif impl == "awq" or algorithm == "awq": | ||
| extra_packages.append("autoawq") | ||
| # Static quantization needs calibration data | ||
| if algorithm != "rtn": | ||
| extra_packages.append("datasets") |
There was a problem hiding this comment.
This information is available in olive_config.json. Rather not duplicate it here.
|
|
||
|
|
||
| @mcp.tool() | ||
| async def detect_hardware() -> dict: |
There was a problem hiding this comment.
This is all supported by python package psutils. Can we just take a dependency on that module rather than duplicating effort.
| job_log_fn(job_id, f"Reusing cached venv ({key})") | ||
|
|
||
| _touch_venv(venv_path) | ||
| return python_path |
There was a problem hiding this comment.
Might want to add a simple python -m pip list to show the status of the created environment.
Describe your changes
Add olive mcp server
Checklist before requesting a review
lintrunner -a(Optional) Issue link